#!/bin/env python

import os
import sys
import string
from xml.dom import minidom

#
# opgen.py -- generates tables and constants for decoding
#
# - itab.c
# - itab.h
#

#
# special operator types for internal purposes.
#
spl_mnm_types = [   'd3vil',      \
                    'na',         \
                    'grp_reg',    \
                    'grp_rm',     \
                    'grp_vendor', \
                    'grp_x87',    \
                    'grp_mode',   \
                    'grp_osize',  \
                    'grp_asize',  \
                    'grp_mod',    \
                    'none'        \
                ]

#
# opcode-vendor dictionary
#                
vend_dict = { 
    'AMD'      : '00', 
    'INTEL'    : '01' 
}


#
# opcode-mode dictionary
#

mode_dict   = { 
    '16'       : '00', 
    '32'       : '01', 
    '64'       : '02' 
}

#
# opcode-operand dictionary
#
operand_dict = {
    'Ap'      : [    'OP_A'        , 'SZ_P'     ],
    'E'       : [    'OP_E'        , 'SZ_NA'    ],
    'Eb'      : [    'OP_E'        , 'SZ_B'     ],
    'Ew'      : [    'OP_E'        , 'SZ_W'     ],
    'Ev'      : [    'OP_E'        , 'SZ_V'     ],
    'Ed'      : [    'OP_E'        , 'SZ_D'     ],
    'Ez'      : [    'OP_E'        , 'SZ_Z'     ],
    'Ex'      : [    'OP_E'        , 'SZ_MDQ'   ],
    'Ep'      : [    'OP_E'        , 'SZ_P'     ],
    'G'       : [    'OP_G'        , 'SZ_NA'    ],
    'Gb'      : [    'OP_G'        , 'SZ_B'     ],
    'Gw'      : [    'OP_G'        , 'SZ_W'     ],
    'Gv'      : [    'OP_G'        , 'SZ_V'     ],
    'Gvw'     : [    'OP_G'        , 'SZ_MDQ'   ],
    'Gd'      : [    'OP_G'        , 'SZ_D'     ],
    'Gx'      : [    'OP_G'        , 'SZ_MDQ'   ],
    'Gz'      : [    'OP_G'        , 'SZ_Z'     ],
    'M'       : [    'OP_M'        , 'SZ_NA'    ],
    'Mb'      : [    'OP_M'        , 'SZ_B'     ],
    'Mw'      : [    'OP_M'        , 'SZ_W'     ],
    'Ms'      : [    'OP_M'        , 'SZ_W'     ],
    'Md'      : [    'OP_M'        , 'SZ_D'     ],
    'Mq'      : [    'OP_M'        , 'SZ_Q'     ],
    'Mt'      : [    'OP_M'        , 'SZ_T'     ],
    'I1'      : [    'OP_I1'       , 'SZ_NA'    ],
    'I3'      : [    'OP_I3'       , 'SZ_NA'    ],
    'Ib'      : [    'OP_I'        , 'SZ_B'     ],
    'Isb'     : [    'OP_I'        , 'SZ_SB'    ],
    'Iw'      : [    'OP_I'        , 'SZ_W'     ],
    'Iv'      : [    'OP_I'        , 'SZ_V'     ],
    'Iz'      : [    'OP_I'        , 'SZ_Z'     ],
    'Jv'      : [    'OP_J'        , 'SZ_V'     ],
    'Jz'      : [    'OP_J'        , 'SZ_Z'     ],
    'Jb'      : [    'OP_J'        , 'SZ_B'     ],
    'R'       : [    'OP_R'        , 'SZ_RDQ'   ], 
    'C'       : [    'OP_C'        , 'SZ_NA'    ],
    'D'       : [    'OP_D'        , 'SZ_NA'    ],
    'S'       : [    'OP_S'        , 'SZ_NA'    ],
    'Ob'      : [    'OP_O'        , 'SZ_B'     ],
    'Ow'      : [    'OP_O'        , 'SZ_W'     ],
    'Ov'      : [    'OP_O'        , 'SZ_V'     ],
    'V'       : [    'OP_V'        , 'SZ_NA'    ],
    'W'       : [    'OP_W'        , 'SZ_NA'    ],
    'P'       : [    'OP_P'        , 'SZ_NA'    ],
    'Q'       : [    'OP_Q'        , 'SZ_NA'    ],
    'VR'      : [    'OP_VR'       , 'SZ_NA'    ],
    'PR'      : [    'OP_PR'       , 'SZ_NA'    ],
    'AL'      : [    'OP_AL'       , 'SZ_NA'    ],
    'CL'      : [    'OP_CL'       , 'SZ_NA'    ],
    'DL'      : [    'OP_DL'       , 'SZ_NA'    ],
    'BL'      : [    'OP_BL'       , 'SZ_NA'    ],
    'AH'      : [    'OP_AH'       , 'SZ_NA'    ],
    'CH'      : [    'OP_CH'       , 'SZ_NA'    ],
    'DH'      : [    'OP_DH'       , 'SZ_NA'    ],
    'BH'      : [    'OP_BH'       , 'SZ_NA'    ],
    'AX'      : [    'OP_AX'       , 'SZ_NA'    ],
    'CX'      : [    'OP_CX'       , 'SZ_NA'    ],
    'DX'      : [    'OP_DX'       , 'SZ_NA'    ],
    'BX'      : [    'OP_BX'       , 'SZ_NA'    ],
    'SI'      : [    'OP_SI'       , 'SZ_NA'    ],
    'DI'      : [    'OP_DI'       , 'SZ_NA'    ],
    'SP'      : [    'OP_SP'       , 'SZ_NA'    ],
    'BP'      : [    'OP_BP'       , 'SZ_NA'    ],
    'eAX'     : [    'OP_eAX'      , 'SZ_NA'    ],
    'eCX'     : [    'OP_eCX'      , 'SZ_NA'    ],
    'eDX'     : [    'OP_eDX'      , 'SZ_NA'    ],
    'eBX'     : [    'OP_eBX'      , 'SZ_NA'    ],
    'eSI'     : [    'OP_eSI'      , 'SZ_NA'    ],
    'eDI'     : [    'OP_eDI'      , 'SZ_NA'    ],
    'eSP'     : [    'OP_eSP'      , 'SZ_NA'    ],
    'eBP'     : [    'OP_eBP'      , 'SZ_NA'    ],
    'rAX'     : [    'OP_rAX'      , 'SZ_NA'    ],
    'rCX'     : [    'OP_rCX'      , 'SZ_NA'    ],
    'rBX'     : [    'OP_rBX'      , 'SZ_NA'    ],
    'rDX'     : [    'OP_rDX'      , 'SZ_NA'    ],
    'rSI'     : [    'OP_rSI'      , 'SZ_NA'    ],
    'rDI'     : [    'OP_rDI'      , 'SZ_NA'    ],
    'rSP'     : [    'OP_rSP'      , 'SZ_NA'    ],
    'rBP'     : [    'OP_rBP'      , 'SZ_NA'    ],
    'ES'      : [    'OP_ES'       , 'SZ_NA'    ],
    'CS'      : [    'OP_CS'       , 'SZ_NA'    ],
    'DS'      : [    'OP_DS'       , 'SZ_NA'    ],
    'SS'      : [    'OP_SS'       , 'SZ_NA'    ],
    'GS'      : [    'OP_GS'       , 'SZ_NA'    ],
    'FS'      : [    'OP_FS'       , 'SZ_NA'    ],
    'ST0'     : [    'OP_ST0'      , 'SZ_NA'    ],
    'ST1'     : [    'OP_ST1'      , 'SZ_NA'    ],
    'ST2'     : [    'OP_ST2'      , 'SZ_NA'    ],
    'ST3'     : [    'OP_ST3'      , 'SZ_NA'    ],
    'ST4'     : [    'OP_ST4'      , 'SZ_NA'    ],
    'ST5'     : [    'OP_ST5'      , 'SZ_NA'    ],
    'ST6'     : [    'OP_ST6'      , 'SZ_NA'    ],
    'ST7'     : [    'OP_ST7'      , 'SZ_NA'    ],
    None    : [    'OP_NONE'     , 'SZ_NA'    ],
    'ALr8b'   : [    'OP_ALr8b'    , 'SZ_NA'    ],
    'CLr9b'   : [    'OP_CLr9b'    , 'SZ_NA'    ],
    'DLr10b'  : [    'OP_DLr10b'   , 'SZ_NA'    ],
    'BLr11b'  : [    'OP_BLr11b'   , 'SZ_NA'    ],
    'AHr12b'  : [    'OP_AHr12b'   , 'SZ_NA'    ],
    'CHr13b'  : [    'OP_CHr13b'   , 'SZ_NA'    ],
    'DHr14b'  : [    'OP_DHr14b'   , 'SZ_NA'    ],
    'BHr15b'  : [    'OP_BHr15b'   , 'SZ_NA'    ],
    'rAXr8'   : [    'OP_rAXr8'    , 'SZ_NA'    ],
    'rCXr9'   : [    'OP_rCXr9'    , 'SZ_NA'    ],
    'rDXr10'  : [    'OP_rDXr10'   , 'SZ_NA'    ],
    'rBXr11'  : [    'OP_rBXr11'   , 'SZ_NA'    ],
    'rSPr12'  : [    'OP_rSPr12'   , 'SZ_NA'    ],
    'rBPr13'  : [    'OP_rBPr13'   , 'SZ_NA'    ],
    'rSIr14'  : [    'OP_rSIr14'   , 'SZ_NA'    ],
    'rDIr15'  : [    'OP_rDIr15'   , 'SZ_NA'    ],
    'jWP'     : [    'OP_J'        , 'SZ_WP'    ],
    'jDP'     : [    'OP_J'        , 'SZ_DP'    ],

}

#
# opcode prefix dictionary
# 
pfx_dict = { 
    'aso'     : 'P_aso',   
    'oso'     : 'P_oso',   
    'rexw'    : 'P_rexw', 
    'rexb'    : 'P_rexb',  
    'rexx'    : 'P_rexx',  
    'rexr'    : 'P_rexr',
    'inv64'   : 'P_inv64', 
    'def64'   : 'P_def64', 
    'depM'    : 'P_depM',
    'cast1'   : 'P_c1',    
    'cast2'   : 'P_c2',    
    'cast3'   : 'P_c3'   
}


#
# globals
#
opr_constants = []
siz_constants = []
tables        = {}
table_sizes   = {}
mnm_list      = []
default_opr   = 'O_NONE, O_NONE, O_NONE'


#
# collect the operand/size constants
# 
for o in operand_dict.keys():
    if not (operand_dict[o][0] in opr_constants):
        opr_constants.append(operand_dict[o][0])
    if not (operand_dict[o][1] in siz_constants):
        siz_constants.append(operand_dict[o][1])

xmlDoc = minidom.parse( 'x86optable.xml' )
tlNode = xmlDoc.firstChild

#
# look for top-level optable node
#
while tlNode and tlNode.localName != 'x86optable': tlNode = tlNode.nextSibling

#
# creates a table entry
#
def centry(i, defmap):
    if defmap['type'][0:3] == 'grp':
        opr    = default_opr
        mnm    = '\'' + defmap['type'].lower() + '\''
        pfx    = defmap['name'].upper()
    elif defmap['type'] == 'leaf':
        mnm    = '\'' + defmap['name'] + '\''
        opr    = defmap['opr']
        pfx    = defmap['pfx']
        if len(mnm) == 0: mnm = '\'na\''
        if len(opr) == 0: opr = default_opr
        if len(pfx) == 0: pfx = 'P_none'
    else:
        opr    = default_opr
        pfx    = 'P_none'
        mnm    = '\'invalid\''

    return '  itab_entry ( %-16s %-26s %s ),\n' % (mnm + ',', opr + ',', pfx)

#
# makes a new table and adds it to the global
# list of tables
#
def mktab(name, size):
    if not (name in tables.keys()):
        tables[name] = {}
        table_sizes[name] = size

for node in tlNode.childNodes:

    opcodes = []
    iclass  = ''
    vendor  = ''

    # we are only interested in <instruction>
    if node.localName != 'instruction':
        continue

     # we need the operator attribute
    if not ('operator' in node.attributes.keys()):
        print ('error: no operator given in <instruction>.')
        sys.exit(-1) 

    # check if this instruction was already defined.
    # else add it to the global list of operators
    operator = node.attributes['operator'].value
    if operator in mnm_list:
        print ('error: multiple declarations of operator=\'%s\'' % operator);
        sys.exit(-1)
    else:
        mnm_list.append(operator)

    #
    # collect instruction 
    #   - vendor
    #   - class
    #
    for n in node.childNodes:
        if n.localName == 'vendor':
            vendor = (n.firstChild.data).strip();
        elif n.localName == 'class':
            iclass = n.firstChild.data;

    #
    # for each opcode definition
    #
    for n in node.childNodes:
        if n.localName != 'opcode':
            continue;
        
        opcode = n.firstChild.data.strip();
        parts  = opcode.split(';'); 
        flags  = []
        opr    = []
        pfx    = []
        opr    = []
        pfx_c  = []

        # get cast attribute, if given
        if 'cast' in n.attributes.keys():
            pfx_c.append( 'P_c' + n.attributes['cast'].value )

        # get implicit addressing attribute, if given
        if 'imp_addr' in n.attributes.keys():
            if int( n.attributes['imp_addr'].value ):
                pfx_c.append( 'P_ImpAddr' )

        # get mode attribute, if given
        if 'mode' in n.attributes.keys():
            v = (n.attributes['mode'].value).strip()
            modef = v.split();
            for m in modef:
                if not (m in pfx_dict):
                    print ('warning: unrecognized mode attribute \'%s\'' % m)
                else:
                     pfx_c.append(pfx_dict[m])

        #
        # split opcode definition into
        #   1. prefixes (pfx)
        #   2. opcode bytes (opc)
        #   3. operands
        #
        if len(parts) == 1:
            opc = parts[0].split()
        elif len(parts) == 2:
            opc = parts[0].split()
            opr = parts[1].split()
            for o in opc:
                if o in pfx_dict:
                    pfx = parts[0].split()
                    opc = parts[1].split()
                    break
        elif len(parts) == 3:
            pfx = parts[0].split()
            opc = parts[1].split()
            opr = parts[2].split()
        else:
            print ('error: invalid opcode definition of %s\n' % operator)
            sys.exit(-1)
        # Convert opcodes to upper case
        for i in range(len(opc)):
            opc[i] = opc[i].upper()

        #
        # check for special cases of instruction translation
        # and ignore them
        # 
        if operator == 'pause' or \
           ( operator == 'nop' and opc[0] == '90' ) or \
           operator == 'invalid' or \
            operator == 'db':
            continue

        #
        # Convert prefix
        #
        for p in pfx:
            if not ( p in pfx_dict.keys() ):
                print ('error: invalid prefix specification: %s \n' % pfx)
            pfx_c.append( pfx_dict[p] )
        if len(pfx) == 0:
            pfx_c.append( 'P_none' )
        pfx = '|'.join( pfx_c )

        #
        # Convert operands
        #
        opr_c = [ 'O_NONE', 'O_NONE', 'O_NONE' ]
        for i in range(len(opr)): 
            if not (opr[i] in operand_dict.keys()):
                print ('error: invalid operand declaration: %s\n' % opr[i])
            opr_c[i] = 'O_' + opr[i]
        opr = '%-8s %-8s %s' % (opr_c[0] + ',', opr_c[1] + ',', opr_c[2])

        table_sse   = ''
        table_name  = 'itab__1byte'
        table_size  = 256
        table_index = ''

        for op in opc:
            if op[0:3] == 'SSE':
                table_sse = op
            elif op == '0F' and len(table_sse): 
                table_name = 'itab__pfx_%s__0f' % table_sse
                table_size = 256
                table_sse  = ''
            elif op == '0F':
                table_name = 'itab__0f'
                table_size = 256
            elif op[0:5] == '/X87=':
                tables[table_name][table_index] = { \
                    'type': 'grp_x87',  \
                    'name': '%s__op_%s__x87' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']    
                table_index = '%02X' % int(op[5:7], 16)
                table_size = 64
            elif op[0:4] == '/RM=':
                tables[table_name][table_index] = { \
                    'type': 'grp_rm',  \
                    'name': '%s__op_%s__rm' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']    
                table_index = '%02X' % int(op[4:6])
                table_size  = 8
            elif op[0:5] == '/MOD=':
                tables[table_name][table_index] = { \
                    'type': 'grp_mod',  \
                    'name': '%s__op_%s__mod' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']
                if len(op) == 8:
                    v = op[5:8]
                else:
                    v = op[5:7]
                mod_dict    = { '!11': 0, '11': 1 }
                table_index = '%02X' % int(mod_dict[v])
                table_size  = 2
            elif op[0:2] == '/O':
                tables[table_name][table_index] = { \
                    'type': 'grp_osize',  \
                    'name': '%s__op_%s__osize' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']    
                table_index = '%02X' % int(mode_dict[op[2:4]])
                table_size  = 3
            elif op[0:2] == '/A':
                tables[table_name][table_index] = { \
                    'type': 'grp_asize',  \
                    'name': '%s__op_%s__asize' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']    
                table_index = '%02X' % int(mode_dict[op[2:4]])
                table_size  = 3
            elif op[0:2] == '/M':
                tables[table_name][table_index] = { \
                    'type': 'grp_mode',  \
                    'name': '%s__op_%s__mode' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']    
                table_index = '%02X' % int(mode_dict[op[2:4]])
                table_size  = 3
            elif op[0:6] == '/3DNOW':
                table_name  = 'itab__3dnow'
                table_size  = 256
            elif op[0:1] == '/':
                tables[table_name][table_index] = { \
                    'type': 'grp_reg',  \
                    'name': '%s__op_%s__reg' % (table_name, table_index) \
                }                  
                table_name  = tables[table_name][table_index]['name']    
                table_index = '%02X' % int(op[1:2])
                table_size  = 8
            else:
                table_index = op

            mktab(table_name, table_size)

        if len(vendor):
            tables[table_name][table_index] = { \
                'type': 'grp_vendor',  \
                'name': '%s__op_%s__vendor' % (table_name, table_index) \
            }                  
            table_name  = tables[table_name][table_index]['name']    
            table_index = vend_dict[vendor]
            table_size = 2
            mktab(table_name, table_size)

        tables[table_name][table_index] = { \
            'type' : 'leaf',   \
            'name' : operator, \
            'pfx'  : pfx,      \
            'opr'  : opr,      \
            'flags': flags     \
        }

# ---------------------------------------------------------------------
# Generate itab.h
# ---------------------------------------------------------------------

f = open('itab.py', 'w')

f.write('''
# itab.py -- auto generated by opgen.py, do not edit. 

from inst import *
from operand import *
''')

#
# Generate enumeration of size constants
#
siz_constants.sort()
f.write('''
''')

f.write('\n' )
f.write('ITAB__VENDOR_INDX__AMD = 0\n' )
f.write('ITAB__VENDOR_INDX__INTEL = 1\n' )
f.write('\n')


f.write('\n' )
f.write('ITAB__MODE_INDX__16 = 0\n' )
f.write('ITAB__MODE_INDX__32 = 1\n' )
f.write('ITAB__MODE_INDX__64 = 2\n' )
f.write('\n')


f.write('\n' )
f.write('ITAB__MOD_INDX__NOT_11 = 0\n' )
f.write('ITAB__MOD_INDX__11 = 1\n' )
f.write('\n')

#
# Generate enumeration of the tables
#
table_names = list(tables.keys())
table_names.sort();

f.write( '\n' )
i = 0
for name in table_names:
    f.write('%s = %s\n' % (name.upper(), i))
    i = i + 1
f.write( '\n' ) 

#
# Generate operators list
#
f.write('\noperator = [\n')
for m in mnm_list:
    f.write('  \'%s\',\n' % m)
f.write(']\n\n')

f.write('\noperator_spl = [\n')
for m in spl_mnm_types:
    f.write('  \'%s\',\n' % m)
f.write(']\n\n')

#
# generate operator list
#
f.write('operators_str = [\n')
for m in mnm_list:
    f.write('  \'%s\',\n' % m )
f.write(']\n\n')

#
# generate instruction tables
#

f.write('\n')
for t in table_names:
    f.write('\n' + t.lower() + ' = [\n');
    for i in range(int(table_sizes[t])):
        index = '%02X' % i
        if index in tables[t]:
            f.write(centry(index, tables[t][index]))
        else:
            f.write(centry(index,{'type':'invalid'}))
    f.write(']\n');

#
# write the instruction table list
#
f.write( '\n# the order of this table matches itab_index ')
f.write( '\nitab_list = [\n' )
for name in table_names:
    f.write( '    %s,\n' % name.lower() )
f.write( ']\n' );

f.close();

# vim:expandtab
# vim:sw=4
# vim:ts=4
